#|output: false
#|cache: false
source("utils.R")
load("data/03.doc_from_env_nested_cv.Rdata")Generate projections of DOC.
Apply the model to new environmental data to get a world map of POC projections.
Set-up and load data
Extract projections
# Unnest new predictions (i.e. projections)
new_preds <- res %>%
select(fold, cv_type, new_preds) %>%
unnest(new_preds) %>%
# Apply exp to predictions as we predicted log(doc)
mutate(pred_doc = exp(pred_doc_log), .after = pred_doc_log) %>%
select(cv_type, fold, lon, lat, contains("doc"))
# Get those from stratified CV
new_preds_strat <- new_preds %>% filter(cv_type == "stratified")
# And those from spatial CV
new_preds_spat <- new_preds %>% filter(cv_type == "spatial")Join projections with R² value for each fold and each cv_type.
# Unnest predictions on test set
preds <- res %>% select(fold, cv_type, preds) %>% unnest(preds)
# Compute Rsquare for each fold of each CV type
rsquares <- preds %>%
group_by(cv_type, fold) %>%
rsq(truth = doc_log, estimate = .pred) %>%
select(cv_type, fold, rsq = .estimate)
# Join with projections
new_preds_strat <- new_preds_strat %>% left_join(rsquares, by = join_by(cv_type, fold))
new_preds_spat <- new_preds_spat %>% left_join(rsquares, by = join_by(cv_type, fold))Generate maps
For both types of CV, we use all the folds to generate and average prediction, weighted by R². Similarly, we also compute a weighted standard deviation.
Average by pixel
# Stratified
strat_proj <- new_preds_strat %>%
group_by(lon, lat) %>%
summarise(
doc_avg = wtd.mean(pred_doc, weights = rsq, na.rm = TRUE),
doc_sd = sqrt(wtd.var(pred_doc, weights = rsq, na.rm = TRUE)),
.groups = "drop"
)
# Spatial
spat_proj <- new_preds_spat %>%
group_by(lon, lat) %>%
summarise(
doc_avg = wtd.mean(pred_doc, weights = rsq, na.rm = TRUE),
doc_sd = sqrt(wtd.var(pred_doc, weights = rsq, na.rm = TRUE)),
.groups = "drop"
)
# Generate common colour bar limits for both CV types
doc_avg_lims <- c(
min(c(strat_proj$doc_avg, spat_proj$doc_avg)),
max(c(strat_proj$doc_avg, spat_proj$doc_avg))
)
doc_sd_lims <- c(
min(c(strat_proj$doc_sd, spat_proj$doc_sd)),
max(c(strat_proj$doc_sd, spat_proj$doc_sd))
)Let’s now plot maps with a common colour scale for both CV types.
Stratified CV
ggplot(strat_proj) +
geom_polygon(data = world, aes(x = lon, y = lat, group = group), fill = "grey") +
geom_raster(aes(x = lon, y = lat, fill = doc_avg)) +
ggplot2::scale_fill_viridis_c(option = "F", limits = doc_avg_lims) +
labs(title = "DOC avg from stratified CV") +
coord_quickmap(expand = 0)ggplot(strat_proj) +
geom_polygon(data = world, aes(x = lon, y = lat, group = group), fill = "grey") +
geom_raster(aes(x = lon, y = lat, fill = doc_sd)) +
ggplot2::scale_fill_viridis_c(trans = "log1p", option = "E", limits = doc_sd_lims) +
labs(title = "DOC sd from stratified CV") +
coord_quickmap(expand = 0)Btw, how do projections vary across CV folds? Let’s plot it.
ggplot(new_preds_strat) +
geom_polygon(data = world, aes(x = lon, y = lat, group = group), fill = "grey") +
geom_raster(aes(x = lon, y = lat, fill = pred_doc)) +
ggplot2::scale_fill_viridis_c(option = "F") +
labs(title = "DOC pred across stratified CV folds") +
coord_quickmap(expand = 0) +
facet_wrap(~fold)Folds are relatively similar, not surprising given stratified CV was constrained on deciles of DOC values.
Spatial CV
ggplot(spat_proj) +
geom_polygon(data = world, aes(x = lon, y = lat, group = group), fill = "grey") +
geom_raster(aes(x = lon, y = lat, fill = doc_avg)) +
ggplot2::scale_fill_viridis_c(option = "F", limits = doc_avg_lims) +
labs(title = "DOC avg from spatial CV") +
coord_quickmap(expand = 0)ggplot(spat_proj) +
geom_polygon(data = world, aes(x = lon, y = lat, group = group), fill = "grey") +
geom_raster(aes(x = lon, y = lat, fill = doc_sd)) +
ggplot2::scale_fill_viridis_c(trans = "log1p", option = "E", limits = doc_sd_lims) +
labs(title = "DOC sd from spatial CV") +
coord_quickmap(expand = 0)Sd across folds is higher for spatial CV, we can expect more variation across folds, let’s plot it.
ggplot(new_preds_spat) +
geom_polygon(data = world, aes(x = lon, y = lat, group = group), fill = "grey") +
geom_raster(aes(x = lon, y = lat, fill = pred_doc)) +
ggplot2::scale_fill_viridis_c(option = "F") +
labs(title = "DOC pred across spatial CV folds") +
coord_quickmap(expand = 0) +
facet_wrap(~fold)As expected, more variation can be detected across folds.
Check the difference between stratified and spatial CV
strat_proj %>%
select(lon, lat, strat = doc_avg) %>%
left_join(spat_proj %>% select(lon, lat, spat = doc_avg), by = join_by(lon, lat)) %>%
mutate(diff = strat - spat) %>%
ggplot() +
geom_polygon(data = world, aes(x = lon, y = lat, group = group), fill = "grey") +
geom_raster(aes(x = lon, y = lat, fill = diff)) +
scale_fill_gradient2(low = "#4575b4", mid = "#ffffbf", high = "#d73027") +
labs(title = "DOC stratified CV - DOC spatial CV") +
coord_quickmap(expand = 0)No obvious difference between both (except in the North Sea), likely due to averaging across folds.